#爬虫入门第二章
#学者 小韦
#2022年6月28日 天气:阴 地点:湖南永州冷水滩区
import re

import requests
import json

#需求：爬取图片
#UA伪装
# header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
#                      'Chrome/101.0.0.0 Safari/537.36'}
# #请求url
# url='https://thispersondoesnotexist.com/image'
# #content返回的是二进制形式的图片数据
# img_data=requests.get(url=url,headers=header).content
#
# with open('./不存在的脸.jpg','wb') as fp:
#     fp.write(img_data)

#正则表达式
#UA伪装
header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                     'Chrome/101.0.0.0 Safari/537.36'}
#请求url
url='http://www.72qq.com/biaoqing/t/shadiao/'

img_data2=requests.get(url=url,headers=header).text
# 保存
# with open('./沙雕.html','w',encoding='utf-8') as fp:
#     fp.write(img_data2)
ex='<li class="">.*?<img src="(.*?)" data-src="(.*?)" alt.*?<a href="(.*?)" class.*?</li>'
img_src_list=re.findall(ex,img_data2,re.S)
print(img_src_list)

# <li class="">
#                             <a href="http://www.72qq.com/biaoqing/47769.html" target="_blank">
#                                 <img src="http://img.72qq.com/file/202204/08/5c48f2b163.jpg" data-src="http://img.72qq.com/file/202204/08/5c48f2b163.jpg" alt="超级好笑的冰墩墩沙雕聊天表情包" class="loaded" data-was-processed="true">
#                             </a>
#                             <p class="ms3 cl"><a href="javascript:;">8</a><a href="javascript:;" class="hate">0</a></p>
#                             <a href="http://www.72qq.com/biaoqing/47769.html" class="wz" target="_blank">超级好笑的冰墩墩沙雕聊天表情包</a>
#                             <div class="ms1 cl"><span>共<i>9</i>图</span>2022-04-08</div>
#                         </li>

#ex='<li class="">.*?<img src="(.*?)" data-src="(.*?)" alt.*?<a href="(.*?)" class.*?</li>'
